package com.meiyuetao.myt.crawl.filter;

import java.util.Map;

import lab.s2jh.crawl.filter.ParseFilterChain;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.google.common.collect.Maps;

public class BeiBeiSingleParseFilter extends AbstractCommodityParseFilter {

    private static final Logger logger = LoggerFactory.getLogger(BeiBeiSingleParseFilter.class);

    private final static int MAX_REVIEW_PAGES = 5;

    private static final String[] ALERT_DOMAIN_INFOS = { "beibei", "jd.com", "360buy.com", "3.cn" };

    @Override
    public void doFilterInternal(String url, ParseFilterChain filterChain) {
        /*
         * logger.debug("Invoking {} ...", this.getClass()); HtmlPage htmlPage =
         * fetchHtmlPage(url);
         * 
         * String sn = null; Matcher matcher =
         * this.urlMatchPattern.matcher(url); if (matcher.find()) { sn =
         * matcher.group(1); } Assert.notNull(sn); List<?> cateList =
         * htmlPage.getByXPath("//DIV[@class='breadcrumb']//A"); List<String>
         * categories = Lists.newArrayList(); for (int i = 0; i <
         * cateList.size() - 1; i++) { HtmlElement node = (HtmlElement)
         * cateList.get(i); categories.add(node.getTextContent().trim()); }
         * ParseCommodity parseCommodity =
         * parseCommodityService.findByProperty("baseUrl", url); if
         * (parseCommodity == null) { parseCommodity = new ParseCommodity(url);
         * //设置来源分组标识，一般取对应站点主域名即可 parseCommodity.setSourceType("jd.com");
         * //设置商品源编码 parseCommodity.setSourceCode(sn);
         * parseCommodity.setUid(url);
         * 
         * } parseCommodity.reset();
         * 
         * //商品所属分类路径
         * parseCommodity.setCategoryPath(StringUtils.join(categories, ">"));
         * String title = parseTitle(htmlPage, "//DIV[@id='name']/H1");
         * parseCommodity.setTitle(title);
         * 
         * //销售价格属性处理 parseSalePrice(parseCommodity, htmlPage,
         * "//DIV[@id='summary-price']//STRONG[@id='jd-price']",
         * "//DIV[@id='summary-price']//STRONG[@id='jd-price']"); //商品描述
         * parseDescription(parseCommodity, htmlPage,
         * "//DIV[@id='product-detail-1']//DIV[@class='detail-content']",
         * "//DIV[@id='product-detail-1']//IMG", "data-lazyload",
         * ALERT_DOMAIN_INFOS); //橱窗图 parseWindowImgs(parseCommodity, htmlPage,
         * "//DIV[@id='spec-list']//DIV[@class='spec-items']//IMG",
         * "//DIV[@id='spec-n1']//IMG", "data-lazyload"); //促销口号
         * parseSalePrompt(parseCommodity, htmlPage,
         * "//DIV[@id='product-promotions']"); //库存属性处理
         * parseSaleStock(parseCommodity, htmlPage,
         * "//DIV[@id='store-prompt']//STRONG");
         * logger.debug("Saving Parse Commodity: {}", parseCommodity); DateTime
         * dTime = new DateTime();
         * parseCommodity.setLastFetchTime(dTime.getMillis());
         * parseCommodity.setLastFetchTimeLabel
         * (dTime.toString("yyyy-MM-dd HH:mm:ss"));
         * parseCommodityService.save(parseCommodity);
         */

    }

    @Override
    public Map<String, Object> parseSimpleData(String url) {
        if (isAcceptUrl(url)) {
            Map<String, Object> jsonMap = Maps.newLinkedHashMap();
            HtmlPage htmlPage = fetchHtmlPage(url);
            HtmlElement salePriceNode = htmlPage.getFirstByXPath("//DIV[@id='detail-meta']//DIV[@class='over-price']//SPAN[@class='pink']//EM[@class='price']");
            String salePrice = "";
            if (salePriceNode != null) {
                salePrice = salePriceNode.asText();
            }
            salePrice = cleanInvisibleChar(salePrice);
            if (StringUtils.isNotBlank(salePrice)) {
                char c = salePrice.trim().charAt(0);
                if (c > '9' || c < '0') {
                    salePrice = salePrice.substring(1, salePrice.length());
                }
                jsonMap.put("salePrice", salePrice);
            } else {
                return null;
            }

            return jsonMap;
        }
        return null;
    }

}
